import requests
import os
from bs4 import BeautifulSoup


# 抓取四大名著

def get_html(main_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
    }

    resp = requests.get(main_url, headers=headers)
    resp.encoding = resp.apparent_encoding
    # print(req.text)
    soup = BeautifulSoup(resp.text, 'lxml')
    return soup


def get_book(soup):
    # class_加下划线作为一个关键字参数来指定类名，不加会报错
    div = soup.find_all('div', class_="book-item")
    for con in div:
        # 获取书的名称
        # print(con)
        # print(con.get_text())
        # print(repr(con.get_text()))
        # print(repr(con.get_text().replace('\n','')))
        book_name = con.get_text().replace('\n', '')
        # 获取书的链接
        book_href = 'https://www.shicimingju.com' + con.a['href']
        print(book_name)
        print(book_href)


if __name__ == '__main__':
    main_url = 'https://www.shicimingju.com/bookmark/sidamingzhu.html'

    # soup=get_html(main_url)
    # get_book(soup)
    get_book(get_html(main_url))
